#!/usr/bin/env python3
# $Id: splashrun 6433 2010-05-11 14:06:29Z wheirman $

import sys, os, time, getopt, tempfile

HOME = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0])))

class UnsupportedParameter(ValueError):
    pass

def mkNewDir(dirname):
    os.system('rm -rf ' + dirname)
    os.makedirs(dirname)

def run(cmd):
    sys.stdout.flush()
    sys.stderr.flush()
    rc = os.system(cmd)
    rc = rc >> 8
    return rc

class Benchmark:
    def __init__(self, name, dir_, cmdname, size2args, max_threads=None, files=[]):
        self.name = name
        self.dir_ = os.path.join(HOME, 'codes', dir_)
        self.cmdname = cmdname
        self.size2args = size2args
        self.max_threads = max_threads
        self.files = files

    def validate(self, inputsize, nthreads):
        if self.max_threads and nthreads > self.max_threads:
            raise UnsupportedParameter(f"{self.name}: maximum {self.max_threads} threads supported")
        if inputsize not in self.size2args:
            raise UnsupportedParameter(f"{self.name}: input size {inputsize} not supported")

    def prepare(self, inputsize, nthreads, rundir):
        self.validate(inputsize, nthreads)
        if self.files:
            for f in self.files:
                run(f'cp {self.dir_}/{f} {rundir}')
        inp = self.size2args[inputsize]
        if callable(inp):
            inp = inp(nthreads)
        return { 'input': inp, 'nthreads': nthreads }

    def run(self, args):
        return os.path.join(self.dir_, self.cmdname) + ' ' + self.cmdline(args)

class BenchmarkPlain(Benchmark):
    def __init__(self, name, dir_, cmdname, size2args, cmdargs, **kwds):
        super().__init__(name, dir_, cmdname, size2args, **kwds)
        self.cmdargs = cmdargs

    def cmdline(self, args):
        return self.cmdargs % args

class BenchmarkFile(BenchmarkPlain):
    def __init__(self, name, dir_, cmdname, size2args, cmdargs, unzip, **kwds):
        super().__init__(name, dir_, cmdname, size2args, cmdargs, **kwds)
        self.unzip = unzip

    def prepare(self, inputsize, nthreads, rundir):
        args = super().prepare(inputsize, nthreads, rundir)
        cmd = self.unzip % {
            'dir_': self.dir_,
            'rundir': rundir,
            'file': args['input'][0] if isinstance(args['input'], tuple) else args['input']
        }
        print('[SPLASH]', 'Extracting output:', cmd)
        run(cmd)
        return args

    def cmdline(self, args):
        if isinstance(args['input'], tuple):
            for i in range(len(args['input'])):
                args[f'input[{i}]'] = args['input'][i]
        return self.cmdargs % args

class BenchmarkStdin(Benchmark):
    def __init__(self, name, dir_, cmdname, size2args, fileargs, **kwds):
        super().__init__(name, dir_, cmdname, size2args, **kwds)
        self.fileargs = fileargs

    def prepare(self, inputsize, nthreads, rundir):
        args = super().prepare(inputsize, nthreads, rundir)
        with open(os.path.join(rundir, 'input'), 'w') as f:
            f.write(self.fileargs % args)
        return args

    def cmdline(self, args):
        return '< input'

# benchmarks 字典
benchmarks = {
  # apps
  'barnes':     BenchmarkStdin('barnes', 'apps/barnes', 'BARNES', { 'test': 1024, 'small': 16384, 'large': 32*1024 }, '\n%(input)u\n123\n\n0.025\n0.05\n1.0\n2.0\n5.0\n0.075\n0.25\n%(nthreads)u\n'),
  'fmm':        BenchmarkStdin('fmm', 'apps/fmm', 'FMM', { 'test': 512, 'tiny': 1024, 'small': 16384, 'large': 32*1024 }, 'two cluster\nplummer\n%(input)u\n1e-6\n%(nthreads)u\n5\n.025\n0.0\ncost zones\n', max_threads = 64),
  'ocean.cont': BenchmarkPlain('ocean.cont', 'apps/ocean/contiguous_partitions', 'OCEAN', { 'test': 18, 'small': 258, 'large': 1026 }, '-n%(input)u -p%(nthreads)u'),
  'ocean.ncont':BenchmarkPlain('ocean.ncont', 'apps/ocean/non_contiguous_partitions', 'OCEAN', { 'test': 18, 'small': 258, 'large': 1026 }, '-n%(input)u -p%(nthreads)u', max_threads = 64),
  'radiosity':  BenchmarkPlain('radiosity', 'apps/radiosity', 'RADIOSITY', { 'test': '', 'small': '-room -ae 5000 -en 0.05 -bf 0.10', 'large': '-room' }, '-p %(nthreads)u %(input)s -batch'),
  'raytrace':   BenchmarkFile('raytrace', 'apps/raytrace', 'RAYTRACE', { 'test': ('teapot', ''), 'small': ('car', '-m64'), 'large': ('car', '-m64 -a4') }, '-p%(nthreads)u %(input[1])s inputs/%(input[0])s.env',
                              'mkdir %(rundir)s/inputs; gunzip < %(dir_)s/inputs/%(file)s.env.Z > %(rundir)s/inputs/%(file)s.env; gunzip < %(dir_)s/inputs/%(file)s.geo.Z > %(rundir)s/inputs/%(file)s.geo'),
  'volrend':    BenchmarkFile('volrend', 'apps/volrend', 'VOLREND', { 'test': 'head-scaleddown4', 'small': 'head-scaleddown2', 'large': 'head' }, '%(nthreads)u %(input)s',
                              'tar xzv -C %(rundir)s -f %(dir_)s/inputs/%(file)s.tgz'),
  'water.nsq':  BenchmarkStdin('water.nsq', 'apps/water-nsquared', 'WATER-NSQUARED', { 'test': 27, 'small': 512, 'large': 2197 }, '  1.5e-16   %(input)u  3   6\n -1      3000     3  0\n%(nthreads)u 6.212752\n',
                               files = [ 'random.in' ]),
  'water.sp':   BenchmarkStdin('water.sp', 'apps/water-spatial', 'WATER-SPATIAL', { 'test': 27, 'small': 512, 'large': 2197 }, '  1.5e-16   %(input)u  3   6\n -1      3000     3  0\n%(nthreads)u 6.212752\n',
                               files = [ '../water-nsquared/random.in' ]),

  # kernels
  'cholesky':   BenchmarkFile('cholesky', 'kernels/cholesky', 'CHOLESKY', { 'test': 'tk14.O', 'small': 'tk25.O', 'large': 'tk29.O' }, '-p%(nthreads)u %(input)s',
                              'gunzip < %(dir_)s/inputs/%(file)s.Z > %(rundir)s/%(file)s'),
  'fft':        BenchmarkPlain('fft', 'kernels/fft', 'FFT', { 'test': 10, 'tiny': 14, 'small': 18, 'large': 22 }, '-m%(input)u -p%(nthreads)u'),
  'lu.cont':    BenchmarkPlain('lu.cont', 'kernels/lu/contiguous_blocks', 'LU', { 'test': 16, 'small': 512, 'large': 1024 }, '-n%(input)u -p%(nthreads)u'),
  'lu.ncont':   BenchmarkPlain('lu.ncont', 'kernels/lu/non_contiguous_blocks', 'LU', { 'test': 16, 'small': 512, 'large': 1024 }, '-n%(input)u -p%(nthreads)u'),
  'radix':      BenchmarkPlain('radix', 'kernels/radix', 'RADIX', { 'test': 16384, 'tiny': 65536, 'small': 262144, 'large': 1048576 }, '-n%(input)u -p%(nthreads)u'),

  # scale versions
  'barnes-scale':     BenchmarkStdin('barnes', 'apps/barnes', 'BARNES', { 'small': lambda p: 16384*p/16 }, '\n%(input)u\n123\n\n0.025\n0.05\n1.0\n2.0\n5.0\n0.075\n0.25\n%(nthreads)u\n'),
  'fmm-scale':        BenchmarkStdin('fmm', 'apps/fmm', 'FMM', { 'small': lambda p: 16384*p/16 }, 'two cluster\nplummer\n%(input)u\n1e-6\n%(nthreads)u\n5\n.025\n0.0\ncost zones\n'),
  'ocean.cont-scale': BenchmarkPlain('ocean.cont', 'apps/ocean/contiguous_partitions', 'OCEAN', { 'small': lambda p: 256*p/16+2 }, '-n%(input)u -p%(nthreads)u'),
  'water.nsq-scale':  BenchmarkStdin('water.nsq', 'apps/water-nsquared', 'WATER-NSQUARED', { 'small': lambda p: {16:512,32:1331,64:2197}[p] }, '  1.5e-16   %(input)u  3   6\n -1      3000     3  0\n%(nthreads)u 6.212752\n',
                               files = [ 'random.in' ]),
  'fft-scale':        BenchmarkPlain('fft', 'kernels/fft', 'FFT', { 'small': lambda p: {16:18,32:20,64:22}[p] }, '-m%(input)u -p%(nthreads)u'),
  'lu.cont-scale':    BenchmarkPlain('lu.cont', 'kernels/lu/contiguous_blocks', 'LU', { 'small': lambda p: 512*p/16 }, '-n%(input)u -p%(nthreads)u'),
  'lu.ncont-scale':   BenchmarkPlain('lu.ncont', 'kernels/lu/non_contiguous_blocks', 'LU', { 'small': lambda p: 512*p/16 }, '-n%(input)u -p%(nthreads)u'),
  'radix-scale':      BenchmarkPlain('radix', 'kernels/radix', 'RADIX', { 'small': lambda p: 262144*p/16 }, '-n%(input)u -p%(nthreads)u'),

  'fft_O0':   	BenchmarkPlain('fft', 'kernels/fft', 'FFT-O0', { 'test': 10, 'tiny': 14, 'small': 18, 'large': 22 }, '-m%(input)u -p%(nthreads)u'),
  'fft_O1':   	BenchmarkPlain('fft', 'kernels/fft', 'FFT-O1', { 'test': 10, 'tiny': 14, 'small': 18, 'large': 22 }, '-m%(input)u -p%(nthreads)u'),
  'fft_O2':   	BenchmarkPlain('fft', 'kernels/fft', 'FFT-O2', { 'test': 10, 'tiny': 14, 'small': 18, 'large': 22 }, '-m%(input)u -p%(nthreads)u'),
  'fft_O3':   	BenchmarkPlain('fft', 'kernels/fft', 'FFT-O3', { 'test': 10, 'tiny': 14, 'small': 18, 'large': 22 }, '-m%(input)u -p%(nthreads)u'),
  'raytrace_opt': BenchmarkFile('raytrace', 'apps/raytrace', 'RAYTRACE.opt', { 'test': ('teapot', ''), 'small': ('car', '-m64'), 'large': ('car', '-m64 -a4') }, '-p%(nthreads)u %(input[1])s inputs/%(input[0])s.env',
                                'mkdir %(rundir)s/inputs; gunzip < %(dir_)s/inputs/%(file)s.env.Z > %(rundir)s/inputs/%(file)s.env; gunzip < %(dir_)s/inputs/%(file)s.geo.Z > %(rundir)s/inputs/%(file)s.geo'),

  'fft_rep2': BenchmarkPlain('fft', 'kernels/fft', 'FFT', { 'test': 10, 'tiny': 14, 'small': 18, 'large': 22 }, '-m%(input)u -p%(nthreads)u -r2'),
  'fft_forever': BenchmarkPlain('fft', 'kernels/fft', 'FFT', { 'test': 10, 'tiny': 14, 'small': 18, 'large': 22 }, '-m%(input)u -p%(nthreads)u -r1000000'),
}


def allbenchmarks():
    return sorted(filter(lambda x: not x.endswith('-scale'), benchmarks.keys())) \
         + sorted(filter(lambda x: x.endswith('-scale'), benchmarks.keys()))

if __name__ == '__main__':
    def usage():
        print('SPLASH-2 run program, similar to PARSEC `parsecmgmt -a run`')
        print('Usage:')
        print('  %s  -p <program (all)>  -i <inputsize (test)>  -n <nthreads (1)>  -s <submit-program (time)>  -d <rundir (temp)>' % sys.argv[0])
        print('Benchmarks:')
        print(' ', ' '.join(allbenchmarks()))

    program = 'all'
    inputsize = 'test'
    nthreads = 1
    submit = 'time'
    rundir = None

    if not sys.argv[1:]:
        usage()
        sys.exit()

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hlp:i:n:s:d:")
    except getopt.GetoptError as e:
        print(e)
        usage()
        sys.exit(2)

    for o, a in opts:
        if o == '-h':
            usage()
            sys.exit()
        if o == '-l':
            print(' '.join(allbenchmarks()))
            sys.exit()
        if o == '-p':
            program = a
        if o == '-i':
            inputsize = a
        if o == '-n':
            nthreads = int(a)
        if o == '-s':
            submit = a
        if o == '-d':
            rundir = a

    if program == 'all':
        programs = list(benchmarks.keys())
    else:
        programs = program.split()

    print('[SPLASH] Benchmarks to run:', ' '.join(programs))
    print()

    ret_codes = []
    failed_run = False
    for bm_name in programs:
        bm = benchmarks[bm_name]
        print('[SPLASH]', '[========== Running benchmark', bm.name, '==========]')
        rundir_ = rundir if rundir else tempfile.mkdtemp()
        print('[SPLASH] Setting up run directory:', rundir_)
        mkNewDir(rundir_)
        os.chdir(rundir_)
        try:
            args = bm.prepare(inputsize, nthreads, rundir_)
        except UnsupportedParameter as e:
            if program == 'all':
                print(e)
                continue
            else:
                raise
        cmd = submit + ' ' + bm.run(args)
        print('[SPLASH]', 'Running \'' + cmd + '\':')
        print('[SPLASH]', '[---------- Beginning of output ----------]')
        retcode = run(cmd)
        if retcode != 0:
            ret_codes.append(retcode)
            failed_run = True
        print('[SPLASH]', '[----------    End of output    ----------]')
        if not rundir:
            os.system('rm -rf ' + rundir_)
        print('[SPLASH]', 'Done.')

    if failed_run:
        sys.exit(ret_codes[0])
